clear all

* directory setting
global odir /Users/sangdong/Documents/KoPDP_2020update/KoPDP2020 /* current folder */
global raw "raw" /* raw files newly downloaded in KoPDP 2020 project */
global old "old" /* raw files from KoPDP 2019 */
global temp "temp" /* temporary files, supposed to be erased at the end of code */
global result "result" /* desired results */
cd $odir
capture mkdir temp


* Assignee

import delimited $raw/assignee_8417_2020.csv, encoding(UTF-8) clear

rename v1-v6 (appnum order engname kiprisid country address)

destring appnum, replace ignore("-")

format appnum kiprisid %15.0g

save $temp/assg8417_2020, replace	

use $old/assignee, clear

gen year = floor(appnum/1e7) - 1e5

drop if year >= 2010

drop year

append using $temp/assg8417_2020

format kiprisid appnum %15.0g

format engname country address %20s

save $temp/temp, replace

	* kiprisid - kiprisidH matching table
	
	keep kiprisid

	duplicates drop

	merge 1:1 kiprisid using $old/kip_harm, keep(1 3)

	replace kip_harm = kiprisid if _m==1
	
	drop _m

	merge m:1 kip_harm using $old/kip_harm2, keep(1 3)
	
	replace kiprisidH = kip_harm if _m==1
	
	drop _m

	save $temp/temp1, replace

use $temp/temp, clear

merge m:1 kiprisid using $temp/temp1, nogenerate keepusing(kiprisidH)

order appnum order kiprisid kiprisidH

save $result/assignee_updated, replace



* Basic Info

import delimited $raw/basic_8417_2020.csv, encoding(UTF-8) clear

rename v1-v8 (appnum appdate opennum opendate registernum registerdate nclaim finaldisposal)

drop opennum opendate

destring appnum appdate registernum registerdate nclaim , replace ignore(- .)

format appnum registernum %15.0g

save $temp/basic8417_2020, replace

use $old/basicinfo, clear

gen year = floor(appnum/1e7) - 1e5

drop if year >= 2010

drop year

destring appdate registernum registerdate nclaim , replace ignore(- .)

format registerdate %15.0g

append using $temp/basic8417_2020

save $result/basicinfo_updated, replace



* IPC

import delimited $raw/ipc_8417_2020.csv, encoding(UTF-8) clear

rename v1-v4 (appnum order ipc ipcdate)

destring appnum ipcdate, replace ignore(( ) - .)

save $temp/ipc8417_2020, replace

use $old/ipc, clear

gen year = floor(appnum/1e7) - 1e5

drop if year >= 2010

drop year

destring ipcdate, replace ignore(- .)

append using $temp/ipc8417_2020

save $result/ipc_updated, replace



* Numbers

import delimited $raw/basic_8417_2020.csv, encoding(UTF-8) clear

rename v1-v8 (appnum appdate opennum opendate registernum registerdate nclaim finaldisposal)

destring appnum appdate opennum opendate registernum registerdate nclaim , replace ignore(- .)

format appnum opennum registernum %15.0g

keep app* open* register*

save $temp/basic8417_2020, replace

import delimited $old/numbers.csv, encoding(UTF-8) clear

rename v1-v8 (appnum appdate opennum opendate registernum registerdate nclaim finaldisposal)

destring appnum appdate opennum opendate registernum registerdate nclaim , replace ignore(- . _)

format appnum opennum registernum %15.0g

keep app* open* register*

save $temp/basic_2018, replace

gen year = floor(appnum/1e7) - 1e5

drop if year >= 2010

drop year

append using $temp/basic8417_2020

save $result/numbers, replace

	** Sub Numbers
	
	use $result/numbers, clear
	
	preserve

	keep appnum opennum
	
	drop if opennum == .
	
	duplicates drop
	
	gen year = floor(appnum/1e7) - 1e5
	
	keep if year >= 2002
	
	drop year
	
	save $temp/numbersA, replace /* 공개번호와 출원번호 매칭을 위한 테이블, 2002년 이후 출원분만 이용 */
	
	restore
	
	keep appnum registernum
	
	drop if registernum == .
	
	duplicates drop
	
	gsort registernum appnum
	
	bysort registernum: keep if _n == _N
	
	save $temp/numbersB, replace 	/* 	등록번호와 출원번호 매칭을 위한 테이블. 
								하나 등록번호에 여러개의 출원번호가 매치되는 경우 ///
								가장 늦게 출원된 번호로 등록번호를 매칭함 */
	
	

* Citation

import delimited $raw/citation_8417_2020.csv, encoding(UTF-8) clear

rename v1-v7 (citing cited_date cited_code cited_ctry cited_num cited_type cited_typecode)

keep if cited_ctry == "KR"

keep if substr(cited_code, 1, 1) == "A" | substr(cited_code, 1, 1) == "B"

destring cited_date cited_num, replace

format citing cited_num %15.0g

drop if floor(cited_num/1e12) == 2

replace cited_typecode = "발송문서" if cited_type == "E0801"

replace cited_typecode = "심사관인용" if cited_type == "E0802"

replace cited_typecode = "선행기술조사보고서" if cited_type == "E0805"

replace cited_typecode = "출원인인용" if cited_type == "E0806"

	** sub files for cit8417_2020

	preserve

	keep if substr(cited_code, 1, 1) == "A"

	save $temp/citA, replace

	restore

	keep if substr(cited_code, 1, 1) == "B"
	
	save $temp/citB, replace
	
** opennumber to appnumber

use $temp/citA, clear

rename cited_num opennum

merge m:1 opennum using $temp/numbersA, keep(1 3) nogenerate

rename (opennum appnum) (cited_num cited_appnum)

save $temp/citA_result, replace

** registernumber to appnumber

use $temp/citB, clear

rename cited_num registernum

merge m:1 registernum using $temp/numbersB, keep(1 3) nogenerate

rename (registernum appnum) (cited_num cited_appnum)

save $temp/citB_result, replace

** append, save

use $temp/citB_result, clear

append using $temp/citA_result

keep citing cited_num cited_code cited_typecode

label define typecode 1 "발송문서" 2 "심사관인용" 3 "선행기술조사보고서" 4 "출원인인용"

encode cited_typecode, gen(typecode)

drop cited_typecode

rename typecode cited_typecode

label values cited_typecode typecode

save $temp/cit8417_2020, replace

use $old/citation, clear

gen year = floor(citing/1e7) - 1e5

drop if year >= 2010

drop year

append using $temp/cit8417_2020

save $result/citation_updated, replace



*** deleting all temp files
shell rm -r "temp"
